Used libaries:

library(mosaic)
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Attache Paket: 'mosaic'
## Die folgenden Objekte sind maskiert von 'package:dplyr':
## 
##     count, do, tally
## Das folgende Objekt ist maskiert 'package:Matrix':
## 
##     mean
## Das folgende Objekt ist maskiert 'package:ggplot2':
## 
##     stat
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
library(plotly)
## 
## Attache Paket: 'plotly'
## Das folgende Objekt ist maskiert 'package:mosaic':
## 
##     do
## Das folgende Objekt ist maskiert 'package:ggplot2':
## 
##     last_plot
## Das folgende Objekt ist maskiert 'package:stats':
## 
##     filter
## Das folgende Objekt ist maskiert 'package:graphics':
## 
##     layout
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(dplyr)
library(rpart)
library(caret)
## 
## Attache Paket: 'caret'
## Das folgende Objekt ist maskiert 'package:mosaic':
## 
##     dotPlot
library(psych)
## 
## Attache Paket: 'psych'
## Die folgenden Objekte sind maskiert von 'package:mosaic':
## 
##     logit, rescale
## Die folgenden Objekte sind maskiert von 'package:ggplot2':
## 
##     %+%, alpha
library(ggplot2)
library(ggcorrplot)
library(rela)

1. Read all samples and combine them

# Delete all variables
rm( list = ls() )
read_idle = read.csv("03_Lunges_with_Dari/01_Idle.csv")
idle_data <- data.frame(read_idle)

read_run = read.csv("03_Lunges_with_Dari/02_Running.csv")
run_data <- data.frame(read_run)

read_lunge= read.csv("03_Lunges_with_Dari/03_Lunge.csv")
lunge_data <- data.frame(read_lunge)

read_siu = read.csv("03_Lunges_with_Dari/04_Siu.csv")
siu_data <- data.frame(read_siu)

Rename ID correctly:

names(idle_data)[1] <- "ID"
names(run_data)[1] <- "ID"
names(lunge_data)[1] <- "ID"
names(siu_data)[1] <- "ID"

Combined data

Overall in total there are 8985 rows

So in the Lunges data for Orientation.X and Orientation.Z we have the wrong data type. instead of

Basically we can’t scale before converting to numeric

idle_run <- rbind(idle_data, run_data)
irun_lunge <- rbind(idle_run, lunge_data)
motion_data <- rbind(irun_lunge, siu_data)
idle_run$Orientation.X <- as.numeric(idle_run$Orientation.X)
colSums(is.na(idle_run))
##                     ID                 Author               Category 
##                      0                      0                      0 
##                 Sample Acceleration.Timestamp         Acceleration.X 
##                      0                      0                      0 
##         Acceleration.Y         Acceleration.Z      AngularVelocity.X 
##                      0                      0                      0 
##      AngularVelocity.Y      AngularVelocity.Z        MagneticField.X 
##                      0                      0                   1499 
##        MagneticField.Y        MagneticField.Z          Orientation.X 
##                   1499                   1499                      0 
##          Orientation.Y          Orientation.Z 
##                      0                      0

2. Do some Exploratory Data Analysis (EDA) on whole data:

motion_data_all <- data.frame(motion_data)
# Remove Magnetic, because there are many NA's in it
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.X")]
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.Y")]
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("MagneticField.Z")]

# Convert columns to correct type
motion_data_all$Category <- as.factor(motion_data_all$Category)
motion_data_all$Acceleration.X <- as.numeric(motion_data_all$Acceleration.X)
## Warning: NAs durch Umwandlung erzeugt
motion_data_all$Orientation.X <- as.numeric(motion_data_all$Orientation.X)
motion_data_all$Orientation.Z <- as.numeric(motion_data_all$Orientation.Z)

More NA’s found after convertion

colSums(is.na(motion_data_all))
##                     ID                 Author               Category 
##                      0                      0                      0 
##                 Sample Acceleration.Timestamp         Acceleration.X 
##                      0                      0                      1 
##         Acceleration.Y         Acceleration.Z      AngularVelocity.X 
##                      0                      0                      1 
##      AngularVelocity.Y      AngularVelocity.Z          Orientation.X 
##                      1                      1                      0 
##          Orientation.Y          Orientation.Z 
##                      0                      0

Remove the NA’s

About 8584 rows left

motion_data_all <- na.omit(motion_data_all)
colSums(is.na(motion_data_all))
##                     ID                 Author               Category 
##                      0                      0                      0 
##                 Sample Acceleration.Timestamp         Acceleration.X 
##                      0                      0                      0 
##         Acceleration.Y         Acceleration.Z      AngularVelocity.X 
##                      0                      0                      0 
##      AngularVelocity.Y      AngularVelocity.Z          Orientation.X 
##                      0                      0                      0 
##          Orientation.Y          Orientation.Z 
##                      0                      0

Scale the data:

motion_data_plot <- data.frame(motion_data_all)
motion_data_plot$acceleration <- sqrt(motion_data_plot$Acceleration.X^2 + motion_data_plot$Acceleration.Y^2 + motion_data_plot$Acceleration.Z^2)
motion_data_plot$angularVelocity <- sqrt(motion_data_plot$AngularVelocity.X^2 + motion_data_plot$AngularVelocity.Y^2 + motion_data_plot$AngularVelocity.Z^2)

quant_var <- select(motion_data_plot, c(6:16))
cat_var <- select(motion_data_plot, c(2:3))

quant_var <- scale(quant_var)
motion_data_scale <- cbind(cat_var, quant_var)
motion_data_scale

Train with data from Ahmed, Tobias, Saghar and Ronaldo

#motion_data_train <- subset(motion_data_all, Author == "Ahmed" | Author == "Tobias" | Author == "Saghar" | Author == "Ronaldo") #+ subset(motion_data_all, Author == "Tobias")
#motion_data_unknown <- subset(motion_data, Author == "Regan" | Author == "Darian") # 33 %

motion_data_train <- subset(motion_data_all, Author == "Ahmed" | Author == "Tobias"| Author == "Ronaldo"| Author == "Regan" | Author == "Darian" ) 
motion_data_test <- subset(motion_data_all, Author == "Saghar" )
# For statistics
motion_data_all_stat <- data.frame(motion_data_plot)
# Remove unrelevant columns
motion_data_all <- motion_data_all[,!names(motion_data_all) %in% c("ID", "Acceleration.Timestamp", "Author", "Sample")]
#motion_data_plot <- motion_data_plot[,!names(motion_data_plot) %in% c("ID", "Acceleration.Timestamp", "Author", "Sample")]

Write merged cleaned data to file:

write.csv(motion_data_all, "All Samples Clean.csv", row.names = FALSE)

Category data distribution

Stacked bar chart:

Seems like Darian and Ahmed have more compared to the others more motion data

cat_count <- group_by(motion_data_all_stat, Author, Category) %>%
  summarize(count=n())
## `summarise()` has grouped output by 'Author'. You can override using the
## `.groups` argument.
stack_bar <- ggplot(cat_count, aes(x = Author, y = count, fill = Category)) +
  geom_bar(stat = "identity") #+
  #geom_text(aes(label = count), vjust = -4.5)

ggplotly(stack_bar)

Density Plots for acceleration and angular velocity

categories <- c("Tobias", "Saghar", "Ronaldo", "Regan", "Ahmed", "Darian")

for (i in categories){
  test <- subset(motion_data_all_stat, Author == i)
  
  plot <- ggplot(test, aes(x=acceleration, fill=Category)) +
    #geom_histogram(bins=(sqrt(length(cat_count$Category))),fill="white",color="black",aes(y=..density..)) +  
    geom_density(alpha=.3) +
    facet_grid(.~Author) 
    #scale_x_continuous(limits=c(-2.5, 8), expand=c(0,0))
  print(plot)
}

categories <- c("Tobias", "Saghar", "Ronaldo", "Regan", "Ahmed", "Darian")

for (i in categories){
  test <- subset(motion_data_all_stat, Author == i & Category == c("Lunges", "Running", "Siu"))
  
  plot <- ggplot(test, aes(x=acceleration, fill=Category)) +
    #geom_histogram(bins=(sqrt(length(cat_count$Category))),fill="white",color="black",aes(y=..density..)) +  
    geom_density(alpha=.3) +
    facet_grid(.~Author) 
    #scale_x_continuous(limits=c(-2.5, 8), expand=c(0,0))
  print(plot)
}
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in is.na(e1) | is.na(e2): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes
## Warning in is.na(e1) | is.na(e2): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

## Warning in `==.default`(Category, c("Lunges", "Running", "Siu")): Länge des längeren Objektes
##       ist kein Vielfaches der Länge des kürzeren Objektes

Correlation plot for numerical values:

Threshold: 0.2

Old one: Remaining features: Acceleration.X, Acceleration.Z, Orientation.X, Orientation.Y, Orientation.Z

New one: Remaining features: Acceleration.X, Acceleration.Y, Acceleration.Z, AngularVelocity.X, AngularVelocity.Y, AngularVelocity.Z

We remove the orientation, since everyone had a different phone position

motion_data_all_numeric <- data.frame(motion_data_all)
motion_data_all_numeric <- motion_data_all_numeric[,!names(motion_data_all_numeric) %in% c("Category")]

#Was for only for testing -> Darian: Everyone has different position of phone, thats why we should skip Orientation
#motion_data_all_numeric <- motion_data_all_numeric[,!names(motion_data_all_numeric) %in% c("Orientation.X", "Orientation.Y", "Orientation.Z")]
#motion_data_all_numeric$Category <- as.numeric(factor(motion_data_all_numeric$Category))
#motion_data_all_numeric$Category <- as.factor(motion_data_all_numeric$Category)
motion_data_all_numeric
# Calculate the correlation matrix of the data frame
cor_matrix <- cor(motion_data_all_numeric)

# Visualize the correlation matrix using ggcorrplot
plot <- ggcorrplot(cor_matrix, hc.order = TRUE, type = "lower", 
           lab = TRUE, lab_size = 3, method = "circle")

ggplotly(plot)
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge

## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge

## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge

## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge

## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]: Anzahl
## der zu ersetzenden Elemente ist kein Vielfaches der Ersetzungslänge

We use only relevant columns for the model training - So only numeric ones and the category

remove_col <- c("ID",  "Acceleration.Timestamp", "Author", "Sample", "Orientation.X", "Orientation.Y", "Orientation.Z")

motion_data_train <- motion_data_train[,!names(motion_data_train) %in% remove_col]
plot_test <- data.frame(motion_data_test)
plot_test <- plot_test[,!names(plot_test) %in% remove_col]

motion_data_train_numeric <- data.frame(plot_test)
motion_data_train_numeric <- motion_data_train_numeric[,!names(motion_data_train_numeric) %in% c("Category")]
#idle_tobias <- subset(motion_data_tobias[1:5], Category == "Idle")
plot <- ggpairs(data=motion_data_train_numeric,aes(color = plot_test$Category), title="Motion pair plot with quantiative variables´for Saghar",
  upper = list(
    continuous = wrap("cor", size = 2.75)
  )
) 

plot

corr_data <- select(subset(motion_data_all_stat, Author == "Saghar"), c("acceleration", "angularVelocity"))
#idle_tobias <- subset(motion_data_tobias[1:5], Category == "Idle")
plot <- ggpairs(data=corr_data,aes(color = plot_test$Category), title="Motion pair plot with acceleration and angular velocity",
  upper = list(
    continuous = wrap("cor", size = 2.75)
  )
) 

plot

Basic plot of acceleration and angular velocity for scaled data

motion_data_box <- select(motion_data_scale, c("Acceleration.X","Acceleration.Y","Acceleration.Z","AngularVelocity.X","AngularVelocity.Y","AngularVelocity.Z"))
boxplot(motion_data_box) +
   #scale_x_discrete(guide = guide_axis(angle = 90)) +
  theme(axis.text.x = element_text(angle = 45))

## NULL
  #geom_violin(trim = FALSE) +
  #geom_boxplot() 
  #theme_minimal()

Plots from Darian:

#### PAIR PLOT ####
motion_data_box <- data.frame(motion_data_all_stat)
motion_data_box %>% select("Acceleration.X", "Acceleration.Y", "Acceleration.Z", "AngularVelocity.X", "AngularVelocity.Y", "AngularVelocity.Z")
motion_data_box$Acceleration = sqrt(motion_data_box$Acceleration.X^2 + motion_data_box$Acceleration.Z^2 + motion_data_box$Acceleration.Y^2)
motion_data_box$Orientation = sqrt(motion_data_box$Orientation.X^2 + motion_data_box$Orientation.Y^2 + motion_data_box$Orientation.Z^2)
motion_data_box$AngularVelocity = sqrt(motion_data_box$AngularVelocity.X^2 + motion_data_box$AngularVelocity.Y^2 + motion_data_box$AngularVelocity.Z^2)
pairs(motion_data_box %>% select("Acceleration", "AngularVelocity"))

#### CORRELATION PLOT ####
corPlot(motion_data_box %>% select("Acceleration.X", "Acceleration.Y", "Acceleration.Z", "AngularVelocity.X", "AngularVelocity.Y", "AngularVelocity.Z")) +
theme(axis.text.x = element_text(angle = 90))

## NULL
corPlot(motion_data_box %>% select("Acceleration", "AngularVelocity"))

## Distribution of each movement added by the Author#### DENSITY PLOT #####
ggplot(motion_data_box, aes(x=Author, fill=Category)) +
geom_density() +
ggtitle("Distribution of Movements") +
theme(plot.title = element_text(hjust = 0.5)) + facet_wrap(~Author, ncol = 3) +
  theme(axis.text.x = element_text(angle = 90))

# geom_vline(data=mu, aes(xintercept=grp.mean, color=sex),
# linetype="dashed")


#### BOXPLOT ####
ggplot(motion_data_box, aes(x = Author, y = AngularVelocity.X, fill = Category)) +
geom_boxplot(outlier.colour="red", outlier.shape=1,
 outlier.size=4) +
ggtitle("Boxplot of AngularVelocity.X w.r.t Authors") +
theme(plot.title = element_text(hjust = 0.5))

Try to plot the timestamp:

#library(lubridate)
#density_data <- data.frame(motion_data_all_stat)

# convert character to POSIXct
#density_data$timestamp <- as.POSIXct(density_data$Acceleration.Timestamp/1000, origin="1970-01-01")
# extract hour and minute:
#density_data$time <- hms::hms(second(density_data$Acceleration.Timestamp), minute(density_data$Acceleration.Timestamp), hour(density_data$Acceleration.Timestamp))  
# convert to POSIXct again since ggplot does not work with class hms.
#density_data$time <- as.POSIXct(density_data$time)
#density_data$date <-as.Date(as.POSIXct(density_data$Acceleration.Timestamp, origin="1970-01-01"))
#density_data
# Visualize the acceleration measurements over time
#plot(motion_data_all_test$Acceleration.Timestamp, motion_data_all_test$Acceleration.Y, type = "l")
#lines(motion_data_all_test$Acceleration.Timestamp, motion_data_all_test$Acceleration.Z, col = "red")
#legend("topright", c( "Acceleration Y", "Acceleration Z"), lty = 1, col = c("black", "red"))
#run_plot <- group_by(run_activity, Activity) %>%
##  ggplot(aes(x=timestamp)) +
#  labs( x = "Timestamp", y = "Acceleration") +
#  geom_line(aes(y = X), color="dark green", alpha = 0.8) +
#  geom_line(aes(y = Y), color="light blue", alpha = 0.8) +
#  geom_line(aes(y = Z), color="dark orange", alpha = 0.8) 
remove_col <- c("ID",  "Author", "Sample", "Orientation.X", "Orientation.Y", "Orientation.Z")
idle_activity = subset(motion_data, Category == "Running" & Author == "Tobias")
idle_activity <- idle_activity[,!names(idle_activity) %in% remove_col]

#test <- scale_x_datetime(breaks = date_breaks("1 hours"), labels=date_format("%H:%m"), expand = c(0,0))
#test
idle_plot <- group_by(idle_activity, Category) %>%
  ggplot(aes(x=Acceleration.Timestamp)) +
  labs( x = "Timestamp", y = "Acceleration") +
  geom_line(aes(y = Acceleration.X), color="dark green", alpha = 0.8) +
  geom_line(aes(y = Acceleration.Y), color="light blue", alpha = 0.8) +
  geom_line(aes(y = Acceleration.Z), color="dark orange", alpha = 0.8) 
#ggplotly(idle_plot)

3. Train on whole data with selected features:

Train split: 80 %, Test split: 20 %

Since the features that we selected correlate good and are relevant, we skip the angular velocity

set.seed(10)

# Take variables from correlation analysis
feature_selection <- motion_data_train#[,c("Category", "Acceleration.X", "Acceleration.Y", "Acceleration.Z")]

train_index_all <- createDataPartition(feature_selection$Category, p =0.80, list = FALSE)
train_data_all<-feature_selection[train_index_all, ]
test_data_all<-feature_selection[-train_index_all, ]

Accuracy on train data with rf: 81.56 % without orientation

set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_rf_all <- train(Category~.,
                      data=train_data_all, 
                      "rf",
                      trControl = control_par
                      )

model_rf_all
## Random Forest 
## 
## 6153 samples
##    6 predictor
##    4 classes: 'Idle', 'Lunges', 'Running', 'Siu' 
## 
## No pre-processing
## Resampling: Cross-Validated (4 fold) 
## Summary of sample sizes: 4614, 4615, 4615, 4615 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##   2     0.8090375  0.7439799
##   4     0.8039982  0.7372037
##   6     0.7970094  0.7278494
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.

Random forest with cross validation 4 fold

cm_train_data <- confusionMatrix(model_rf_all)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction Idle Lunges Running  Siu
##    Idle    23.9    0.7     1.2  0.4
##    Lunges   0.8   16.3     1.0  2.3
##    Running  1.9    1.7    23.3  5.2
##    Siu      0.5    1.4     2.0 17.3
##                            
##  Accuracy (average) : 0.809

Accuracy on testing data with rf and cv: 83.37 % without orientation

set.seed(6)
## Generate predictions
rf_all_pred_test <- predict(model_rf_all,test_data_all) 
        
## Print the accuracy
accuracy_rf_test <- mean(rf_all_pred_test == test_data_all$Category)*100
accuracy_rf_test
## [1] 82.48698
cm_test_data <- confusionMatrix(rf_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Idle Lunges Running Siu
##    Idle     369      9      13   5
##    Lunges    14    253      12  41
##    Running   23     29     371  68
##    Siu        9     19      27 274
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8249          
##                  95% CI : (0.8049, 0.8436)
##     No Information Rate : 0.2754          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7653          
##                                           
##  Mcnemar's Test P-Value : 1.219e-06       
## 
## Statistics by Class:
## 
##                      Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity               0.8892        0.8161         0.8771     0.7062
## Specificity               0.9759        0.9454         0.8922     0.9521
## Pos Pred Value            0.9318        0.7906         0.7556     0.8328
## Neg Pred Value            0.9596        0.9531         0.9502     0.9056
## Prevalence                0.2702        0.2018         0.2754     0.2526
## Detection Rate            0.2402        0.1647         0.2415     0.1784
## Detection Prevalence      0.2578        0.2083         0.3197     0.2142
## Balanced Accuracy         0.9325        0.8807         0.8846     0.8291
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))

rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
        geom_tile() + geom_text(aes(label=Freq)) +
        scale_fill_gradient(low="white", high="#009194") +
        labs(x = "Prediction",y = "Reference") +
        scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
        scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle")) 
        

ggplotly(rf_conf_mat)

6. Now test the best model from Lunges on unkown data and compare accuracy

remove_col <- c("ID",  "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))

unique(motion_data_test$Category)
## [1] Idle    Running Lunges  Siu    
## Levels: Idle Lunges Running Siu

Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40

inspect(motion_data_test)
## 
## categorical variables:  
##       name  class levels    n missing
## 1 Category factor      4 1420       0
##                                    distribution
## 1 Running (47.6%), Lunges (22.1%) ...          
## 
## quantitative variables:  
##                name   class       min         Q1    median         Q3      max
## 1            Sample numeric   1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2    Acceleration.X numeric -19.24533  5.6348475  8.989855 10.3458525 74.95678
## 3    Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4    Acceleration.Z numeric -27.55201 -1.3236225  1.985545  4.8890625 40.44529
## 5 AngularVelocity.X numeric  -7.90234 -0.5865550 -0.007295  0.5926225 12.85294
## 6 AngularVelocity.Y numeric  -7.73286 -0.2825200  0.029700  0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055  0.2679275  7.92185
##          mean       sd    n missing
## 1 20.17323944 9.586790 1420       0
## 2  8.94612076 8.124970 1420       0
## 3 -2.02695663 5.929216 1420       0
## 4  1.79015740 5.362013 1420       0
## 5  0.03390892 1.698911 1420       0
## 6  0.05902677 1.120474 1420       0
## 7 -0.03541651 1.196526 1420       0

Lunges is not recognized at all: 10/10 are missclassified

Idle: 10 / 10 Samples with at least 70 % correct

Running: 10 / 10 Samples with at least 60 % correct

Siu: 9 / 10 Samples with at least 50 % correct

In total we have an avg accuracy of 60 %

list_motion_data_unknown = c()

total_accuracy <- 0
sample_accuracy <- 0
correct_samples_rf <- 0
for(i in 1:length(unique(motion_data_test$Sample))){
  #print(i)
  
  motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
  ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
  motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
  
  motion_data_no_labels <- data.frame(motion_data_unknown)
  names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
  motion_data_no_labels$Category <- ""
  
  
  set.seed(6)
  ## Generate predictions
  rf_Lunges_pred_new <- predict(object = model_rf_all,newdata = motion_data_no_labels) 
          
  ## Print the accuracy
  accuracy <- mean(rf_Lunges_pred_new ==  motion_data_unknown$Category )*100
  total_accuracy <- total_accuracy + accuracy
  
  motion_data_no_labels$Category = rf_Lunges_pred_new

  cm_rf_all <- confusionMatrix(rf_Lunges_pred_new, motion_data_no_labels$Category)
  #print(cm_rf_all)
  test <- as.data.frame(cm_rf_all$table)
  if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
    correct_samples_rf <- correct_samples_rf  + 1
  }
  
  print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))

  list_motion_data_unknown <- append(list_motion_data_unknown, motion_data_no_labels)
}
## [1] "Reference:  Idle Prediction:  Lunges Accuracy:  45.4545454545455"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  66.6666666666667"
## [1] "Reference:  Idle Prediction:  Lunges Accuracy:  14.2857142857143"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  72"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  70"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  60"
## [1] "Reference:  Idle Prediction:  Lunges Accuracy:  45.1612903225806"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  61.2903225806452"
## [1] "Reference:  Idle Prediction:  Lunges Accuracy:  11.1111111111111"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  74.0740740740741"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  93.75"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  84.375"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  81.25"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  90.625"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  84.375"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  84.375"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  87.5"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  84.375"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  81.25"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  73.0769230769231"
## [1] "Reference:  Running Prediction:  Running Accuracy:  58.7301587301587"
## [1] "Reference:  Running Prediction:  Running Accuracy:  76.1194029850746"
## [1] "Reference:  Running Prediction:  Running Accuracy:  72.0588235294118"
## [1] "Reference:  Running Prediction:  Running Accuracy:  77.1084337349398"
## [1] "Reference:  Running Prediction:  Running Accuracy:  74.6666666666667"
## [1] "Reference:  Running Prediction:  Running Accuracy:  76.4705882352941"
## [1] "Reference:  Running Prediction:  Running Accuracy:  77.9411764705882"
## [1] "Reference:  Running Prediction:  Running Accuracy:  70.3125"
## [1] "Reference:  Running Prediction:  Running Accuracy:  66.1290322580645"
## [1] "Reference:  Running Prediction:  Running Accuracy:  58.6206896551724"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  55"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  65"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  76.4705882352941"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  73.3333333333333"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  81.25"
## [1] "Reference:  Siu Prediction:  Running Accuracy:  0"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  73.3333333333333"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  76.9230769230769"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  56.25"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  83.3333333333333"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total:  1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total:  40"
sample_accuracy_rf <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_rf))
## [1] "Accuracy over all samples:  67.8511696249001"
avg_acc_rf <- correct_samples_rf / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_rf * 100))
## [1] "Accuracy of correct samples:  87.5"

Accuracy on train data with knn: 77.29 % without orientation

set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_knn <- train(Category~.,
                      data=train_data_all, 
                      "knn",
                      trControl = control_par,
                      metric = "Accuracy"
                      )

model_knn
## k-Nearest Neighbors 
## 
## 6153 samples
##    6 predictor
##    4 classes: 'Idle', 'Lunges', 'Running', 'Siu' 
## 
## No pre-processing
## Resampling: Cross-Validated (4 fold) 
## Summary of sample sizes: 4614, 4615, 4615, 4615 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.7664556  0.6875408
##   7  0.7617434  0.6812002
##   9  0.7609308  0.6800328
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 5.

KNN with cross validation 4 fold

cm_train_data <- confusionMatrix(model_knn)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction Idle Lunges Running  Siu
##    Idle    24.2    1.6     3.2  1.6
##    Lunges   0.7   16.3     2.0  3.4
##    Running  1.5    1.1    19.8  3.9
##    Siu      0.6    1.2     2.5 16.4
##                             
##  Accuracy (average) : 0.7665

Accuracy on testing data with knn and cv: 82.07 % without orientation

set.seed(6)
## Generate predictions
knn_all_pred_test <- predict(model_knn,test_data_all) 
        
## Print the accuracy
accuracy_knn_test <- mean(knn_all_pred_test == test_data_all$Category)*100
accuracy_knn_test
## [1] 78.38542
cm_test_data <- confusionMatrix(knn_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Idle Lunges Running Siu
##    Idle     376     28      30  21
##    Lunges    12    235      31  49
##    Running   14     19     325  50
##    Siu       13     28      37 268
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7839          
##                  95% CI : (0.7624, 0.8042)
##     No Information Rate : 0.2754          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7106          
##                                           
##  Mcnemar's Test P-Value : 0.0003963       
## 
## Statistics by Class:
## 
##                      Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity               0.9060        0.7581         0.7683     0.6907
## Specificity               0.9295        0.9250         0.9254     0.9321
## Pos Pred Value            0.8264        0.7187         0.7966     0.7746
## Neg Pred Value            0.9639        0.9380         0.9131     0.8992
## Prevalence                0.2702        0.2018         0.2754     0.2526
## Detection Rate            0.2448        0.1530         0.2116     0.1745
## Detection Prevalence      0.2962        0.2129         0.2656     0.2253
## Balanced Accuracy         0.9178        0.8415         0.8469     0.8114
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))

rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
        geom_tile() + geom_text(aes(label=Freq)) +
        scale_fill_gradient(low="white", high="#009194") +
        labs(x = "Prediction",y = "Reference") +
        scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
        scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle"))  
        

ggplotly(rf_conf_mat)

6. Now test the best model from Lunges on unkown data and compare accuracy

remove_col <- c("ID",  "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))

unique(motion_data_test$Category)
## [1] Idle    Running Lunges  Siu    
## Levels: Idle Lunges Running Siu

Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40

inspect(motion_data_test)
## 
## categorical variables:  
##       name  class levels    n missing
## 1 Category factor      4 1420       0
##                                    distribution
## 1 Running (47.6%), Lunges (22.1%) ...          
## 
## quantitative variables:  
##                name   class       min         Q1    median         Q3      max
## 1            Sample numeric   1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2    Acceleration.X numeric -19.24533  5.6348475  8.989855 10.3458525 74.95678
## 3    Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4    Acceleration.Z numeric -27.55201 -1.3236225  1.985545  4.8890625 40.44529
## 5 AngularVelocity.X numeric  -7.90234 -0.5865550 -0.007295  0.5926225 12.85294
## 6 AngularVelocity.Y numeric  -7.73286 -0.2825200  0.029700  0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055  0.2679275  7.92185
##          mean       sd    n missing
## 1 20.17323944 9.586790 1420       0
## 2  8.94612076 8.124970 1420       0
## 3 -2.02695663 5.929216 1420       0
## 4  1.79015740 5.362013 1420       0
## 5  0.03390892 1.698911 1420       0
## 6  0.05902677 1.120474 1420       0
## 7 -0.03541651 1.196526 1420       0

Lunges is not recognized at all: 10/10 are missclassified

Idle: 10 / 10 Samples with at least 70 % correct

Running: 10 / 10 Samples with at least 60 % correct

Siu: 9 / 10 Samples with at least 50 % correct

In total we have an avg accuracy of 60 %

total_accuracy <- 0
sample_accuracy <- 0
correct_samples_knn <- 0
for(i in 1:length(unique(motion_data_test$Sample))){
  #print(i)
  
  motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
  ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
  motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
  
  motion_data_no_labels <- data.frame(motion_data_unknown)
  names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
  motion_data_no_labels$Category <- ""
  
  
  set.seed(6)
  ## Generate predictions
  knn_pred_new <- predict(object = model_knn,newdata = motion_data_no_labels) 
          
  ## Print the accuracy
  accuracy <- mean(knn_pred_new ==  motion_data_unknown$Category )*100
  total_accuracy <- total_accuracy + accuracy
  
  motion_data_no_labels$Category = knn_pred_new

  cm_rf_all <- confusionMatrix(knn_pred_new, motion_data_no_labels$Category)
  #print(cm_rf_all)
  test <- as.data.frame(cm_rf_all$table)
  
  if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
    correct_samples_knn <- correct_samples_knn  + 1
  }
  
  print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))
}
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  63.6363636363636"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  93.3333333333333"
## [1] "Reference:  Idle Prediction:  Lunges Accuracy:  33.3333333333333"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  92"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  93.3333333333333"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  73.3333333333333"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  87.0967741935484"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  83.8709677419355"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  92.5925925925926"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  96.2962962962963"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  78.125"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  71.875"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  65.625"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  75"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  75"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  68.75"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  81.25"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  53.125"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  78.125"
## [1] "Reference:  Lunges Prediction:  Lunges Accuracy:  73.0769230769231"
## [1] "Reference:  Running Prediction:  Running Accuracy:  60.3174603174603"
## [1] "Reference:  Running Prediction:  Running Accuracy:  68.6567164179104"
## [1] "Reference:  Running Prediction:  Running Accuracy:  76.4705882352941"
## [1] "Reference:  Running Prediction:  Running Accuracy:  72.289156626506"
## [1] "Reference:  Running Prediction:  Running Accuracy:  78.6666666666667"
## [1] "Reference:  Running Prediction:  Running Accuracy:  69.1176470588235"
## [1] "Reference:  Running Prediction:  Running Accuracy:  67.6470588235294"
## [1] "Reference:  Running Prediction:  Running Accuracy:  59.375"
## [1] "Reference:  Running Prediction:  Running Accuracy:  64.5161290322581"
## [1] "Reference:  Running Prediction:  Running Accuracy:  53.448275862069"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  50"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  50"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  64.7058823529412"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  40"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  56.25"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  100"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  53.3333333333333"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  69.2307692307692"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  43.75"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  66.6666666666667"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total:  1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total:  40"
sample_accuracy_knn <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_knn))
## [1] "Accuracy over all samples:  69.8304900373805"
avg_acc_knn <- correct_samples_knn / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_knn * 100))
## [1] "Accuracy of correct samples:  97.5"

Accuracy on train data with rpart: 55.69 % without orientation

set.seed(6)
# 6: 89.8 %
control_par <- trainControl(method = "cv", number=4)
model_rpart <- train(Category~.,
                      data=train_data_all, 
                      "rpart",
                      trControl = control_par,
                      metric = "Accuracy"
                      )

model_rpart
## CART 
## 
## 6153 samples
##    6 predictor
##    4 classes: 'Idle', 'Lunges', 'Running', 'Siu' 
## 
## No pre-processing
## Resampling: Cross-Validated (4 fold) 
## Summary of sample sizes: 4614, 4615, 4615, 4615 
## Resampling results across tuning parameters:
## 
##   cp          Accuracy   Kappa    
##   0.09147982  0.5527355  0.3906444
##   0.10448430  0.4700199  0.2788876
##   0.21771300  0.3526860  0.1124208
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.09147982.
# Basic plot for a decision tree
  plot(model_rpart$finalModel,branch = T, margin = 0.1)
  text(model_rpart$finalModel)

Rpart with cross validation 4 fold

cm_train_data <- confusionMatrix(model_rpart)
cm_train_data
## Cross-Validated (4 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction Idle Lunges Running  Siu
##    Idle    23.4    5.7     8.2  3.6
##    Lunges   0.0    0.0     0.0  0.0
##    Running  2.7    4.7    14.7  4.6
##    Siu      0.9    9.8     4.6 17.1
##                             
##  Accuracy (average) : 0.5527

Accuracy on testing data with rpart and cv: 52.07 % without orientation

set.seed(6)
## Generate predictions
rpart_all_pred_test <- predict(model_rpart,test_data_all) 
        
## Print the accuracy
accuracy_rpart_test <- mean(rpart_all_pred_test == test_data_all$Category)*100
accuracy_rpart_test
## [1] 50.39062
cm_test_data <- confusionMatrix(rpart_all_pred_test, test_data_all$Category)
cm_test_data
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Idle Lunges Running Siu
##    Idle     387    135     225  93
##    Lunges     0      0       0   0
##    Running   14     24     121  29
##    Siu       14    151      77 266
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5039          
##                  95% CI : (0.4786, 0.5292)
##     No Information Rate : 0.2754          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.325           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: Idle Class: Lunges Class: Running Class: Siu
## Sensitivity               0.9325        0.0000        0.28605     0.6856
## Specificity               0.5959        1.0000        0.93980     0.7892
## Pos Pred Value            0.4607           NaN        0.64362     0.5236
## Neg Pred Value            0.9598        0.7982        0.77596     0.8813
## Prevalence                0.2702        0.2018        0.27539     0.2526
## Detection Rate            0.2520        0.0000        0.07878     0.1732
## Detection Prevalence      0.5469        0.0000        0.12240     0.3307
## Balanced Accuracy         0.7642        0.5000        0.61293     0.7374
plt <- as.data.frame(cm_test_data$table)
plt$Prediction <- factor(plt$Prediction, levels=rev(levels(plt$Prediction)))

rf_conf_mat <- ggplot(plt, aes(Prediction,Reference, fill= Freq)) +
        geom_tile() + geom_text(aes(label=Freq)) +
        scale_fill_gradient(low="white", high="#009194") +
        labs(x = "Prediction",y = "Reference") +
        scale_y_discrete(labels=c("Idle","Lunges","Running","Siu")) +
        scale_x_discrete(labels=c("Siu", "Running", "Lunges", "Idle")) 
        

ggplotly(rf_conf_mat)

6. Now test the rpart model on unkown data and compare accuracy

remove_col <- c("ID",  "Acceleration.Timestamp", "Author", "Orientation.X", "Orientation.Y", "Orientation.Z")
motion_data_test <- motion_data_test[,!names(motion_data_test) %in% remove_col]
motion_data_test$Sample <- as.numeric(as.factor(motion_data_test$Sample))

unique(motion_data_test$Category)
## [1] Idle    Running Lunges  Siu    
## Levels: Idle Lunges Running Siu

Lunges: 1 - 20 Idle: 11 - 20 Run: 22 - 30 Siu: 31 - 40

inspect(motion_data_test)
## 
## categorical variables:  
##       name  class levels    n missing
## 1 Category factor      4 1420       0
##                                    distribution
## 1 Running (47.6%), Lunges (22.1%) ...          
## 
## quantitative variables:  
##                name   class       min         Q1    median         Q3      max
## 1            Sample numeric   1.00000 13.0000000 22.000000 27.0000000 40.00000
## 2    Acceleration.X numeric -19.24533  5.6348475  8.989855 10.3458525 74.95678
## 3    Acceleration.Y numeric -62.43217 -2.5990100 -1.087490 -0.0446425 10.99254
## 4    Acceleration.Z numeric -27.55201 -1.3236225  1.985545  4.8890625 40.44529
## 5 AngularVelocity.X numeric  -7.90234 -0.5865550 -0.007295  0.5926225 12.85294
## 6 AngularVelocity.Y numeric  -7.73286 -0.2825200  0.029700  0.3547175 10.01106
## 7 AngularVelocity.Z numeric -12.65705 -0.2874675 -0.006055  0.2679275  7.92185
##          mean       sd    n missing
## 1 20.17323944 9.586790 1420       0
## 2  8.94612076 8.124970 1420       0
## 3 -2.02695663 5.929216 1420       0
## 4  1.79015740 5.362013 1420       0
## 5  0.03390892 1.698911 1420       0
## 6  0.05902677 1.120474 1420       0
## 7 -0.03541651 1.196526 1420       0

Lunges is not recognized at all: 10/10 are missclassified

Idle: 10 / 10 Samples with at least 70 % correct

Running: 10 / 10 Samples with at least 60 % correct

Siu: 9 / 10 Samples with at least 50 % correct

In total we have an avg accuracy of 60 %

total_accuracy <- 0
sample_accuracy <- 0
correct_samples_rpart <- 0


for(i in 1:length(unique(motion_data_test$Sample))){
  #print(i)
  
  motion_data_unknown <- subset(motion_data_test,Sample == i) # 55.76 %
  ref <- motion_data_unknown$Category[motion_data_unknown$Sample == i]
  motion_data_unknown <- motion_data_unknown[,!names(motion_data_unknown) %in% c("Sample")]
  
  motion_data_no_labels <- data.frame(motion_data_unknown)
  names(motion_data_no_labels)[names(motion_data_no_labels) == "Category"] <- "Category"
  motion_data_no_labels$Category <- ""
  
  
  set.seed(6)
  ## Generate predictions
  rpart_pred_new <- predict(object = model_rpart,newdata = motion_data_no_labels) 
          
  ## Print the accuracy
  accuracy <- mean(rpart_pred_new ==  motion_data_unknown$Category )*100
  total_accuracy <- total_accuracy + accuracy
  
  motion_data_no_labels$Category = rpart_pred_new

  cm_rf_all <- confusionMatrix(rpart_pred_new, motion_data_no_labels$Category)
  #print(cm_rf_all)
  test <- as.data.frame(cm_rf_all$table)
  if (unique(ref) == test$Prediction[which.max(test$Freq)]) {
    correct_samples_rpart <- correct_samples_rpart  + 1
  }
  
  print(paste("Reference: ", unique(ref), "Prediction: ", test$Prediction[which.max(test$Freq)], "Accuracy: ", accuracy, sep = " "))
}
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  93.9393939393939"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  93.3333333333333"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  85.7142857142857"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  96"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  96.6666666666667"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  93.3333333333333"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  77.4193548387097"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  96.7741935483871"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  66.6666666666667"
## [1] "Reference:  Idle Prediction:  Idle Accuracy:  92.5925925925926"
## [1] "Reference:  Lunges Prediction:  Siu Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Siu Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Siu Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Siu Accuracy:  0"
## [1] "Reference:  Lunges Prediction:  Idle Accuracy:  0"
## [1] "Reference:  Running Prediction:  Running Accuracy:  39.6825396825397"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  23.8805970149254"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  25"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  28.9156626506024"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  32"
## [1] "Reference:  Running Prediction:  Idle Accuracy:  32.3529411764706"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  29.4117647058824"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  34.375"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  33.8709677419355"
## [1] "Reference:  Running Prediction:  Siu Accuracy:  31.0344827586207"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  55"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  65"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  82.3529411764706"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  80"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  93.75"
## [1] "Reference:  Siu Prediction:  Running Accuracy:  0"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  80"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  92.3076923076923"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  81.25"
## [1] "Reference:  Siu Prediction:  Siu Accuracy:  91.6666666666667"
print(paste("Amount of rows for all samples in total: ", nrow(subset(motion_data, Author == "Saghar" ))))
## [1] "Amount of rows for all samples in total:  1420"
print(paste("Amount of samples in total: ", length(unique(motion_data_test$Sample))))
## [1] "Amount of samples in total:  40"
sample_accuracy_rpart <- total_accuracy / length(unique(motion_data_test$Sample))
print(paste("Accuracy over all samples: ", sample_accuracy_rpart))
## [1] "Accuracy over all samples:  48.1072769128794"
avg_acc_rpart <- correct_samples_rpart / length(unique(motion_data_test$Sample))
print(paste("Accuracy of correct samples: ", avg_acc_rpart * 100))
## [1] "Accuracy of correct samples:  50"

Model results:

rf <- paste(correct_samples_rf, " out of ", length(unique(motion_data_test$Sample)))
knn <- paste(correct_samples_knn, " out of ", length(unique(motion_data_test$Sample)))
rpart <- paste(correct_samples_rpart, " out of ", length(unique(motion_data_test$Sample)))


results_models <- data.frame(
  Model = c("Random_forest", "KNN", "Rpart"),
  Train = c(round(mean(model_rf_all$results$Accuracy),3) * 100, round(mean(model_knn$results$Accuracy),3) * 100, round(mean(model_rpart$results$Accuracy),3) * 100),
  #Test = c(accuracy_rf_test, accuracy_knn_test, accuracy_rpart_test),
  
  Test = c(sample_accuracy_rf, sample_accuracy_knn, sample_accuracy_rpart),
  "." = c("", "", ""),
  Correct_Samples_Pred = c(rf, knn, rpart)
)

results_models 

Rpart results:

results_rpart <- data.frame(
  Category = c("Idle", "Running", "Lunge", "Siu"),
  Nr_Samples = c(10, 10, 10, 10),
  Correct_Pred = c(10,0,1,9)
  # Train = c(round(mean(model_rf_all$results$Accuracy),3) * 100, round(mean(model_knn$results$Accuracy),3) * 100, round(mean(model_rpart$results$Accuracy),3) * 100),
  # #Test = c(accuracy_rf_test, accuracy_knn_test, accuracy_rpart_test),
  # 
  # Test = c(sample_accuracy_rf, sample_accuracy_knn, sample_accuracy_rpart),
  # "." = c("", "", ""),
  # Correct_Samples_Pred = c(rf, knn, rpart)
)

results_rpart

Random forest results:

results_randomforest <- data.frame(
  Category = c("Idle", "Running", "Lunge", "Siu"),
  Nr_Samples = c(10, 10, 10, 10),
  Correct_Pred = c(6,10,10,9)
)

results_randomforest

KNN results:

results_knn <- data.frame(
  Category = c("Idle", "Running", "Lunge", "Siu"),
  Nr_Samples = c(10, 10, 10, 10),
  Correct_Pred = c(9,10,10,10)
)

results_knn